Epimutable genes were selected as MAplot calls with padj<1e-4.
setwd("/Users/ab6415/Bioinformatics/Analysis/EpiMALs_PAPER_ANALYSES/03_Figure1/")
library(MASS)
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
library(msir)
## Package 'msir' version 1.3.2
## Type 'citation("msir")' for citing this R package in publications.
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
theme_set(theme_bw(base_size = 16))
ttg_DEseqnorm<-read.table("../02_Normalised_counts/22G_DEseqnorm_counts_averaged.txt"); ttg_DEseqnorm_gens25_100<-ttg_DEseqnorm[,25:47]
ttg_MAplot_data<-read.table("../03_Figure1/MAplot_filtering_22Gs_gens25100_p1e-2.txt",header=TRUE)
ttg_MAplot_data_p1Eminus4<-ttg_MAplot_data[which(ttg_MAplot_data$padj<1e-4),]
epimutable_genes<-unique(ttg_MAplot_data_p1Eminus4$ID)
length(epimutable_genes)
## [1] 442
Correlation analysis on the subset of epimutable genes showed decreasing correlations with distance in generations. This analysis reveals C100, I100, K100 and L100 as outliers - lines showing much lower correlation with the rest of samples. The source of this is difference is unclear, when growing the lines they did not seem to be sick compared to the rest. Including those in the correlation analysis makes the correlations scale with the distance in generations better, but this is only driven by these 4.
When removing these 4 samples, the difference in correlation is significant up the the 50-75 generation comparison, and it stabilises beyond that, consistent with the lack of long-term epigenetic inheritance overall.
#load matrix with the number of generations separating each pair of samples
numgenerations<-read.table("numgenerations_matrix.txt")
#function to plot the correlation coefficients as a function of the distance between samples
cor_analysis<-function(cpm_data,maplot_calls,heatmapfilename,boxplotfilename,dotplotfilename){
filt_cpm_data<-cpm_data[which(rownames(cpm_data) %in% maplot_calls$ID),]
cor_matrix<-cor(log2(filt_cpm_data+1))
numchanges_matrix<-matrix(nrow=nrow(cor_matrix),ncol=ncol(cor_matrix))
rownames(numchanges_matrix)<-rownames(cor_matrix); colnames(numchanges_matrix)<-colnames(cor_matrix)
for (row in rownames(numchanges_matrix)){
for (col in colnames(numchanges_matrix)){
numchanges_matrix[row,col]<-nrow(maplot_calls[which(maplot_calls$line1==row & maplot_calls$line2==col),])
}
}
tit<-"442 genes, p<1e-4"
numgen_reordered<-numgenerations[rownames(cor_matrix),colnames(cor_matrix)]
heatmap.2(cor_matrix,trace="none",dendrogram = "none",Rowv = FALSE,Colv = FALSE,main = tit)
pdf(heatmapfilename)
heatmap.2(cor_matrix,trace="none",dendrogram = "none",Rowv = FALSE,Colv = FALSE,main = tit)
dev.off()
numgen_vector<-numgen_reordered[upper.tri(numgen_reordered)]
cor_vector<-cor_matrix[upper.tri(cor_matrix)]
numchanges_vector<-numchanges_matrix[upper.tri(numchanges_matrix)]
numgen_df<-data.frame(ng_vect=numgen_vector,
cor_vect=cor_vector,
nch_vect=numchanges_vector)
numgen_df$ng_vect<-as.factor(numgen_df$ng_vect)
cors_boxplots <- ggplot(numgen_df, aes(x=ng_vect, y=cor_vect,fill=ng_vect))+ ggtitle(tit) + xlab("generations separating lines") + ylab("correlation coefficient of DE 22G set")+geom_boxplot()+theme_classic()+scale_fill_brewer(aesthetics =c("fill","color"),palette = "GnBu")
ggsave(plot=cors_boxplots,filename = boxplotfilename,dpi="retina")
print(cors_boxplots)
cors_dotplots <- ggplot(numgen_df,aes(x=ng_vect, y=cor_vect))+geom_dotplot(binaxis='y', stackdir='center',aes(fill=factor(ng_vect),color=factor(ng_vect)))+theme_classic()+ ggtitle(tit) + xlab("generations separating lines") + ylab("correlation coefficient of DE 22G set")+scale_fill_brewer(aesthetics =c("fill","color"),palette = "GnBu")+stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,
geom = "crossbar", width = 0.5,color="black")
print(cors_dotplots)
ggsave(plot=cors_dotplots,filename = dotplotfilename,dpi="retina")
#numchanges_boxplots <- ggplot(numgen_df, aes(x=numgen_vector, y=numchanges_vector))+ ggtitle(tit) + xlab("generations separating lines") + ylab("number of DE22Gs")+geom_boxplot()
#print(numchanges_boxplots)
numgen_df$ng_vect<-as.numeric(as.character(numgen_df$ng_vect))
print(wilcox.test(numgen_df[which(numgen_df$ng_vect==25),"cor_vect"],
numgen_df[which(numgen_df$ng_vect==50),"cor_vect"]))
print(wilcox.test(numgen_df[which(numgen_df$ng_vect==50),"cor_vect"],
numgen_df[which(numgen_df$ng_vect==75),"cor_vect"]))
print(wilcox.test(numgen_df[which(numgen_df$ng_vect==75),"cor_vect"],
numgen_df[which(numgen_df$ng_vect==100),"cor_vect"]))
print(wilcox.test(numgen_df[which(numgen_df$ng_vect==100),"cor_vect"],
numgen_df[which(numgen_df$ng_vect==125),"cor_vect"]))
print(wilcox.test(numgen_df[which(numgen_df$ng_vect==125),"cor_vect"],
numgen_df[which(numgen_df$ng_vect==200),"cor_vect"]))
}
#including outlier lines C100,I100,K100,L100
cor_analysis(ttg_DEseqnorm_gens25_100,ttg_MAplot_data_p1Eminus4,
heatmapfilename="correlation_heatmap_withCIKL.pdf",
boxplotfilename="correlation_boxplots_withCIKL.pdf",
dotplotfilename = "correlation_dotplots_withCIKL.pdf")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 25), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"]
## W = 465, p-value = 0.005314
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"]
## W = 521, p-value = 0.0001762
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test
##
## data: numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"]
## W = 65, p-value = 0.7969
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"]
## W = 619, p-value = 0.9031
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 200), "cor_vect"]
## W = 3656, p-value = 0.0293
## alternative hypothesis: true location shift is not equal to 0
#removing outlier lines C100,I100,K100,L100
cor_analysis(ttg_DEseqnorm_gens25_100[,-c(15,20,22,23)],ttg_MAplot_data_p1Eminus4,
heatmapfilename="correlation_heatmap_withoutCIKL.pdf",
boxplotfilename="correlation_boxplots_withoutCIKL.pdf",
dotplotfilename = "correlation_dotplots_withoutCIKL.pdf")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 25), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"]
## W = 465, p-value = 0.005314
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"]
## W = 301, p-value = 0.0163
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test
##
## data: numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"]
## W = 28, p-value = 0.7104
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"]
## W = 264, p-value = 0.7431
## alternative hypothesis: true location shift is not equal to 0
##
##
## Wilcoxon rank sum test with continuity correction
##
## data: numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 200), "cor_vect"]
## W = 681, p-value = 0.6143
## alternative hypothesis: true location shift is not equal to 0
For the set of epimutable genes, we used unsupervised k-means clustering to define 22G expression levels as low (1) or high (2) for all samples in the dataset. Then using this data, we calculated the total number of changes that we see between samples.
When we include the outlier samples, we see that there is a significant difference between the observed number of changes within the 25 generation samples and the 100th generation samples, in other words, the 100th generation samples seem to be more divergent in their 22G expression states.
ttg_DEseqnorm_epimutable_genes<-ttg_DEseqnorm[which(rownames(ttg_DEseqnorm) %in% epimutable_genes),]
kmeans_row<-function(vector){
kmc<-kmeans(x = as.numeric(vector),centers=2)
high<-kmc$cluster[which.is.max(vector)]
if (high==2){return(kmc$cluster)
}else{return(abs(kmc$cluster-3))}
}
library(nnet)
ttg_segmentation_epimutable_genes<-matrix(0,nrow=nrow(ttg_DEseqnorm_epimutable_genes),ncol = 47)
colnames(ttg_segmentation_epimutable_genes)<-colnames(ttg_DEseqnorm_epimutable_genes)
rownames(ttg_segmentation_epimutable_genes)<-rownames(ttg_DEseqnorm_epimutable_genes)
for (gene in rownames(ttg_DEseqnorm_epimutable_genes)){
ttg_segmentation_epimutable_genes[gene,]<-kmeans_row(ttg_DEseqnorm_epimutable_genes[gene,])
}
heatmap.2(as.matrix(ttg_segmentation_epimutable_genes[,25:47]),col = colorRampPalette(c("#67a9cf","#de2d26")),Colv=FALSE,trace="none",dendrogram = "none")
heatmap.2(cor(as.matrix(ttg_segmentation_epimutable_genes[,25:47])),Colv=FALSE,trace="none",dendrogram = "none")
diffs<-c()
line1<-c()
line2<-c()
distances<-c()
for (l1 in colnames(ttg_segmentation_epimutable_genes[,25:47])){
for (l2 in colnames(ttg_segmentation_epimutable_genes[,25:47])){
diffs<-c(diffs,sum(abs(ttg_segmentation_epimutable_genes[,l1]-ttg_segmentation_epimutable_genes[,l2])))
line1<-c(line1,l1); line2<-c(line2,l2)
if (l1=="PMA"){l1<-"P0"}
if (l2=="PMA"){l2<-"P0"}
lineage_1<-substr(l1,start = 1,stop = 1)
gen_1<-as.numeric(substr(l1,start=2,stop=nchar(l1)))
lineage_2<-substr(l2,start = 1,stop = 1)
gen_2<-as.numeric(substr(l2,start=2,stop=nchar(l2)))
if (lineage_1!=lineage_2){
distance=abs(gen_1+gen_2)
}
else if (lineage_1==lineage_2){
distance=abs(gen_1-gen_2)
}
distances<-c(distances,distance)
if (l1=="P0"){l1<-"PMA"}
if (l2=="P0"){l2<-"PMA"}
}}
From the k-means segmented data, we estimate epimutation totals in two alternative ways:
In both estimations, we do not see clear differences between the total epimutation numbers in gen25 and gen100 lines - consistent with no long-term inheritance.
In this case, including the outlier lines has a large effect in the totals.
numchanges_df<-data.frame(num_changes=diffs,line1=line1,line2=line2,distance=distances)
ggplot(numchanges_df)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)))+theme_classic()+scale_fill_brewer(aesthetics =c("fill"),palette = "GnBu")
ggsave("number_of_changes_boxplot_gens25_100_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
ggplot(numchanges_df)+geom_boxplot(aes(y=num_changes/nrow(ttg_segmentation_epimutable_genes),x=factor(distance)))
ggsave("proportion_of_changes_boxplot_gens25_100_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
numchanges_df_50_200<-numchanges_df[which(numchanges_df$distance==50 | numchanges_df$distance==200),]
ggplot(numchanges_df_50_200)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)))+ scale_fill_manual(values=c("#66c2a5", "#8da0cb"))+theme_classic()
ggsave("number_of_changes_boxplot_withinbatch_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
ggplot(numchanges_df_50_200,aes(x=factor(distance),y=num_changes,fill=factor(distance),color=factor(distance)))+geom_dotplot(binaxis = "y", stackdir = "center")+ stat_summary(fun.y=median, geom="point", shape=19,size=5,col="black")+scale_fill_manual(values = c("#a8ddb5","#43a2ca"),aesthetics = c("fill","color"))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
ggsave("number_of_changes_dotplot_withinbatch_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
#testing differences in the number of changes
wilcox.test(numchanges_df[which(numchanges_df$distance==50),"num_changes"],
numchanges_df[which(numchanges_df$distance==200),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df[which(numchanges_df$distance == 50), "num_changes"] and numchanges_df[which(numchanges_df$distance == 200), "num_changes"]
## W = 2366, p-value = 5.954e-15
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==25),"num_changes"],
numchanges_df[which(numchanges_df$distance==50),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df[which(numchanges_df$distance == 25), "num_changes"] and numchanges_df[which(numchanges_df$distance == 50), "num_changes"]
## W = 744, p-value = 0.004453
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==50),"num_changes"],
numchanges_df[which(numchanges_df$distance==75),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df[which(numchanges_df$distance == 50), "num_changes"] and numchanges_df[which(numchanges_df$distance == 75), "num_changes"]
## W = 350, p-value = 1.51e-07
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==75),"num_changes"],
numchanges_df[which(numchanges_df$distance==100),"num_changes"])
## Warning in wilcox.test.default(numchanges_df[which(numchanges_df$distance == :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df[which(numchanges_df$distance == 75), "num_changes"] and numchanges_df[which(numchanges_df$distance == 100), "num_changes"]
## W = 190, p-value = 0.2262
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==100),"num_changes"],
numchanges_df[which(numchanges_df$distance==125),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df[which(numchanges_df$distance == 100), "num_changes"] and numchanges_df[which(numchanges_df$distance == 125), "num_changes"]
## W = 2706, p-value = 0.3617
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==125),"num_changes"],
numchanges_df[which(numchanges_df$distance==200),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df[which(numchanges_df$distance == 125), "num_changes"] and numchanges_df[which(numchanges_df$distance == 200), "num_changes"]
## W = 13114, p-value = 0.2147
## alternative hypothesis: true location shift is not equal to 0
We can see the 4 outlier lines on the top part of the 100th gen epimutation totals distribution, but the median is still similar between 25th and 100th generation lines.
ttg_segmentation_epimutable_genes_25_100<-ttg_segmentation_epimutable_genes[,25:47]
ttg_segmentation_epimutable_genes_25_100<-data.frame(ttg_segmentation_epimutable_genes_25_100)
epimuts_maintained<-function(df,line1,line2){
epimuts_25<-abs(df[,line1]-df[,"PMA"])
epimuts_100<-abs(df[,line2]-df[,"PMA"])
print(c(line1,length(which(epimuts_25==1)),line2,length(which(epimuts_100==1)),"maintained",length(which(epimuts_25==1 & epimuts_100==1))))
}
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"A25","A100")
## [1] "A25" "66" "A100" "116" "maintained"
## [6] "39"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"B25","B100")
## [1] "B25" "54" "B100" "112" "maintained"
## [6] "25"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"C25","C100")
## [1] "C25" "69" "C100" "166" "maintained"
## [6] "41"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"D25","D100")
## [1] "D25" "45" "D100" "93" "maintained"
## [6] "20"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"F25","F100")
## [1] "F25" "56" "F100" "87" "maintained"
## [6] "33"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"G25","G100")
## [1] "G25" "93" "G100" "121" "maintained"
## [6] "56"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"H25","H100")
## [1] "H25" "85" "H100" "122" "maintained"
## [6] "59"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"I25","I100")
## [1] "I25" "67" "I100" "217" "maintained"
## [6] "40"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"J25","J100")
## [1] "J25" "71" "J100" "118" "maintained"
## [6] "47"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"K25","K100")
## [1] "K25" "80" "K100" "178" "maintained"
## [6] "44"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"L25","L100")
## [1] "L25" "94" "L100" "141" "maintained"
## [6] "43"
hist(apply(ttg_segmentation_epimutable_genes_25_100,FUN = mean,MARGIN=1))
hist(apply(ttg_segmentation_epimutable_genes_25_100,FUN = median,MARGIN=1))
epimuts_maintained_median<-function(df,line1,line2){
median<-apply(ttg_segmentation_epimutable_genes_25_100,FUN = median,MARGIN=1)
epimuts_25<-abs(df[,line1]-median)
epimuts_100<-abs(df[,line2]-median)
print(c(line1,length(which(epimuts_25==1)),line2,length(which(epimuts_100==1)),length(which(epimuts_25==1 & epimuts_100==1))))
}
epimut_persistence<-rbind(epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"A25","A100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"B25","B100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"C25","C100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"D25","D100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"F25","F100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"G25","G100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"H25","H100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"I25","I100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"J25","J100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"K25","K100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"L25","L100"))
## [1] "A25" "65" "A100" "67" "14"
## [1] "B25" "85" "B100" "81" "25"
## [1] "C25" "66" "C100" "121" "17"
## [1] "D25" "76" "D100" "46" "12"
## [1] "F25" "69" "F100" "52" "22"
## [1] "G25" "74" "G100" "72" "22"
## [1] "H25" "56" "H100" "71" "19"
## [1] "I25" "50" "I100" "176" "11"
## [1] "J25" "58" "J100" "67" "15"
## [1] "K25" "73" "K100" "147" "25"
## [1] "L25" "79" "L100" "100" "15"
colnames(epimut_persistence)<-c("line_25","epimutations_25","line_100","epimutations_100","shared epimutations")
write.table(epimut_persistence,file="persistence_data.txt")
library(reprex)
epimut_persistence<-data.frame(epimut_persistence[,c(2,4,5)])
epimut_persistence$epimutations_25<-as.numeric(as.character(epimut_persistence$epimutations_25))
epimut_persistence$epimutations_100<-as.numeric(as.character(epimut_persistence$epimutations_100))
epimut_persistence$shared.epimutations<-as.numeric(as.character(epimut_persistence$shared.epimutations))
epimut_persistence$epimutations_25_unique<-epimut_persistence$epimutations_25-epimut_persistence$shared.epimutations
epimut_persistence$epimutations_100_unique<-epimut_persistence$epimutations_100-epimut_persistence$shared.epimutations
mat<-t(as.matrix(epimut_persistence[,c(3,4,5)]))[c(2,3,1),]
library(reshape2)
mat<-melt(mat)
mat$Var1<-factor(mat$Var1,levels(mat$Var1)[c(3,2,1)])
ggplot(mat)+
geom_col(aes(x=Var2,y=value,group=Var1,fill=Var1))+
theme_classic()+
ylab("number of epimutations")+
xlab("lineage")+
scale_fill_manual(values = c("orange","#43a2ca","#a8ddb5"))
ggsave("epimutation_persistence_summary.pdf")
## Saving 7 x 5 in image
mat2<-melt(epimut_persistence[,c(1,2)])
## No id variables; using all as measure variables
ggplot(mat2,aes(x=factor(variable),y=value,fill=factor(variable),color=factor(variable)))+geom_dotplot(binaxis = "y", stackdir = "center")+ stat_summary(fun.y=median, geom="point", shape=1,size=2,col="black")+scale_fill_manual(values = c("#a8ddb5","#43a2ca"),aesthetics = c("fill","color"))+theme_classic()
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
ggsave("epimutation_totals.pdf")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
median(epimut_persistence$epimutations_25)
## [1] 69
median(epimut_persistence$epimutations_100)
## [1] 72
wilcox.test(epimut_persistence$epimutations_25,epimut_persistence$epimutations_100)
## Warning in wilcox.test.default(epimut_persistence$epimutations_25,
## epimut_persistence$epimutations_100): cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: epimut_persistence$epimutations_25 and epimut_persistence$epimutations_100
## W = 44, p-value = 0.2933
## alternative hypothesis: true location shift is not equal to 0
When we do the same analysis removing the 100th generation sample outliers, the amount of observed differences between 25th and 100th generation samples from the pairwise analysis is not significantly different.
Now it seems that the 75, 100 and 125 distances are higher than the rest, however this is an artefact since these three are between-batch comparisons (comparing the 25th and 100th generation samples that were grown and sequenced on separate experiments). So it makes more sense here to compare within batches (distance 50 and distance 200). In this case there is no difference in the number of changes observed between lines.
CIKL<-c("C100","I100","K100","L100")
ttg_MAplot_data_p1Eminus4_noCIKL<-ttg_MAplot_data_p1Eminus4[which(!(ttg_MAplot_data_p1Eminus4$line1 %in% CIKL) &
!(ttg_MAplot_data_p1Eminus4$line2 %in% CIKL)),]
epimutable_genes_noCIKL<-unique(ttg_MAplot_data_p1Eminus4_noCIKL$ID)
length(epimutable_genes_noCIKL)
## [1] 307
ttg_DEseqnorm_epimutable_genes_noCIKL<-ttg_DEseqnorm[which(rownames(ttg_DEseqnorm) %in% epimutable_genes_noCIKL),-c(39,44,46,47)]
kmeans_row<-function(vector){
kmc<-kmeans(x = as.numeric(vector),centers=2)
high<-kmc$cluster[which.is.max(vector)]
if (high==2){return(kmc$cluster)
}else{return(abs(kmc$cluster-3))}
}
library(nnet)
ttg_segmentation_epimutable_genes_noCIKL<-matrix(0,nrow=nrow(ttg_DEseqnorm_epimutable_genes_noCIKL),ncol = 43)
colnames(ttg_segmentation_epimutable_genes_noCIKL)<-colnames(ttg_DEseqnorm_epimutable_genes_noCIKL)
rownames(ttg_segmentation_epimutable_genes_noCIKL)<-rownames(ttg_DEseqnorm_epimutable_genes_noCIKL)
for (gene in rownames(ttg_DEseqnorm_epimutable_genes_noCIKL)){
ttg_segmentation_epimutable_genes_noCIKL[gene,]<-kmeans_row(ttg_DEseqnorm_epimutable_genes_noCIKL[gene,])
}
heatmap.2(as.matrix(ttg_segmentation_epimutable_genes_noCIKL[,25:43]),col = colorRampPalette(c("#67a9cf","#de2d26")),Colv=FALSE,trace="none",dendrogram = "none")
heatmap.2(cor(as.matrix(ttg_segmentation_epimutable_genes_noCIKL[,25:43])),Colv=FALSE,trace="none",dendrogram = "none")
diffs<-c()
line1<-c()
line2<-c()
distances<-c()
for (l1 in colnames(ttg_segmentation_epimutable_genes_noCIKL[,25:43])){
for (l2 in colnames(ttg_segmentation_epimutable_genes_noCIKL[,25:43])){
diffs<-c(diffs,sum(abs(ttg_segmentation_epimutable_genes_noCIKL[,l1]-ttg_segmentation_epimutable_genes_noCIKL[,l2])))
line1<-c(line1,l1); line2<-c(line2,l2)
if (l1=="PMA"){l1<-"P0"}
if (l2=="PMA"){l2<-"P0"}
lineage_1<-substr(l1,start = 1,stop = 1)
gen_1<-as.numeric(substr(l1,start=2,stop=nchar(l1)))
lineage_2<-substr(l2,start = 1,stop = 1)
gen_2<-as.numeric(substr(l2,start=2,stop=nchar(l2)))
if (lineage_1!=lineage_2){
distance=abs(gen_1+gen_2)
}
else if (lineage_1==lineage_2){
distance=abs(gen_1-gen_2)
}
distances<-c(distances,distance)
if (l1=="P0"){l1<-"PMA"}
if (l2=="P0"){l2<-"PMA"}
}}
numchanges_df_noCIKL<-data.frame(num_changes=diffs,line1=line1,line2=line2,distance=distances)
ggplot(numchanges_df_noCIKL)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)))+scale_fill_brewer(aesthetics =c("fill"),palette = "GnBu")+theme_classic()
ggsave("number_of_changes_boxplot_gens25_100_withoutCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
numchanges_df_noCIKL_50_200<-numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50 | numchanges_df_noCIKL$distance==200),]
ggplot(numchanges_df_noCIKL_50_200)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)),width=0.5)+scale_fill_manual(values=c("#66c2a5", "#8da0cb"))+theme_classic()
ggsave("number_of_changes_boxplot_withinbatch_withoutCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
ggplot(numchanges_df_noCIKL_50_200,aes(x=factor(distance),y=num_changes,fill=factor(distance),color=factor(distance)))+geom_dotplot(binaxis = "y", stackdir = "center")+ stat_summary(fun.y=median, geom="point", shape=19,size=5,col="black")+scale_fill_manual(values = c("#a8ddb5","#43a2ca"),aesthetics = c("fill","color"))+
theme_classic()
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
ggsave("number_of_changes_dotplot_withinbatch_withoutCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
#testing differences in the number of changes
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50),"num_changes"],
numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==200),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 50), and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 200), "num_changes"] and "num_changes"]
## W = 2296, p-value = 0.9556
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==25),"num_changes"],
numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 25), and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 50), "num_changes"] and "num_changes"]
## W = 800, p-value = 0.01235
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50),"num_changes"],
numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==75),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 50), and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 75), "num_changes"] and "num_changes"]
## W = 412, p-value = 0.004741
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==75),"num_changes"],
numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==100),"num_changes"])
## Warning in
## wilcox.test.default(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance
## == : cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 75), and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 100), "num_changes"] and "num_changes"]
## W = 66, p-value = 0.1466
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==100),"num_changes"],
numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==125),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 100), and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 125), "num_changes"] and "num_changes"]
## W = 1048, p-value = 0.6713
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==125),"num_changes"],
numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==200),"num_changes"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 125), and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 200), "num_changes"] and "num_changes"]
## W = 5128, p-value = 2.731e-13
## alternative hypothesis: true location shift is not equal to 0
plot_sRNA_levels<-function(gene,pval,padj,savestring){
ordered_data_points<-as.numeric(ttg_DEseqnorm_epimutable_genes[gene,c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
data<-data.frame(ttg_counts=ordered_data_points,line=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
data$sample<-factor(data$line,levels=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
data$generation<-c("PeMA",rep(c("25","100"),11)); data$generation<-factor(data$generation,levels=c("PeMA","25","100"))
data$lineage<-c("PeMA","A","A","B","B","C","C","D","D","F","F","G","G","H","H","I","I","J","J","K","K","L","L")
data$lineage<-factor(data$lineage,levels=c("PeMA","A","B","C","D","F","G","H","I","J","K","L"))
print(ggplot(data)+geom_point(aes(y=ttg_counts,x=lineage,color=generation),size=3)+theme_classic()+ylab("normalised 22G counts")+xlab("lineage")+ggtitle(paste(paste(paste(paste(gene,"p",sep=","),pval,sep="="),"padj",sep=","),padj,sep="="))
+ scale_color_manual(values=c("#fc8d62", "#66c2a5", "#8da0cb")))
if (savestring=="no"){
return()}else{
ggsave(filename = paste(paste(paste("../04_Figure2/",savestring,sep=""),gene,sep="_"),"pdf",sep="."),dpi="retina")}
}
Having defined the high and low 22G level states, we can test on a gene-by-gene basis whether the states in the 25th and 100th generation samples are more similar than you would expect. To test this, I randomise the order of the 1-2 assignations 100000 times, and calculate how many of these would result in an equal or greater number of matches between 25 and 100 generation states than observed.
Also, to incorporate the quantitative range of the data in the analysis, I did a similar analysis calculating the within-lineage variation vs overall variation in random matches of 25 and 100 generation samples.
Both analyses converge on the conclusion that there is no evidence for long-term inheritance in our dataset.
#filter out genes with all 1s or 2s in the 25th/100th gen samples
test_epigen_inheritance<-function(row){
num_matches<-11-sum(abs(row[25:35]-row[37:47]))
rand_matches_distribution<-rep(0,100000)
for (i in seq(100000)){
rand_25<-sample(row[25:35]); rand_100<-sample(row[37:47])
rand_matches<-11-sum(abs(rand_25-rand_100))
rand_matches_distribution[i]<-rand_matches
}
return(c(num_matches,length(which(rand_matches_distribution>=num_matches))/length(rand_matches_distribution)))
}
test_df<-data.frame(t(apply(ttg_segmentation_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance)))
colnames(test_df)<-c("num_matches","pval")
test_df$padj<-p.adjust(test_df$pval,method="fdr")
ggplot(test_df)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_df)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_df<-test_df[order(test_df$pval,decreasing = FALSE),]
ordered_test_df[1:10,]
## num_matches pval padj
## Y116F11A.1 10 0.02399 1
## C39B5.3 9 0.04462 1
## F58E10.7 10 0.05573 1
## Y43F8B.22 9 0.06221 1
## C27D8.2 11 0.09028 1
## Y76B12C.4a 11 0.09065 1
## F52C9.1.2 11 0.09075 1
## Y17D7C.3 8 0.10706 1
## Y17D7C.4 9 0.10903 1
## W01A11.3c 9 0.11103 1
write.table(ordered_test_df,"epistates_test_df_withCILK.txt")
for (gene in rownames(ordered_test_df[1:10,])){
plot_sRNA_levels(gene,ordered_test_df[gene,"pval"],ordered_test_df[gene,"padj"],
savestring = "epistates_test_CILK/epistates_test_top10")
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
test_epigen_inheritance_variance<-function(row){
num_matches<-sum((row[25:35]-row[37:47])**2)/11
rand_matches_distribution<-rep(0,100000)
for (i in seq(100000)){
rand_25<-sample(row[25:35]); rand_100<-sample(row[37:47])
rand_variance<-sum((rand_25-rand_100)**2)/11
rand_matches_distribution[i]<-rand_variance
}
return(c(num_matches,length(which(rand_matches_distribution<=num_matches))/length(rand_matches_distribution)))
}
test_variance_df<-data.frame(t(apply(ttg_DEseqnorm_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance_variance)))
colnames(test_variance_df)<-c("num_matches","pval")
test_variance_df$padj<-p.adjust(test_variance_df$pval,method="fdr")
ggplot(test_variance_df)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_variance_df)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_variance_df<-test_variance_df[order(test_variance_df$pval,decreasing = FALSE),]
ordered_test_variance_df[1:10,]
## num_matches pval padj
## F58G6.3 5.989801e+01 0.00013 0.0574600
## Y17D7C.3 4.929410e+06 0.00219 0.4839900
## F54F7.7 3.029012e+05 0.00337 0.4965133
## C27D8.2 4.556021e+01 0.00597 0.5134567
## K10D2.8 1.221207e+05 0.00611 0.5134567
## ZK228.1 3.681626e+04 0.00726 0.5134567
## R02F2.2 2.545700e+03 0.01259 0.5134567
## F35H10.5 4.067716e+02 0.01284 0.5134567
## F58E10.7 2.567370e+04 0.01332 0.5134567
## C48D1.6 1.167289e+03 0.01356 0.5134567
write.table(ordered_test_variance_df,"variance_test_df_withCILK.txt")
for (gene in rownames(ordered_test_variance_df[1:10,])){
plot_sRNA_levels(gene,ordered_test_variance_df[gene,"pval"],ordered_test_variance_df[gene,"padj"],
savestring = "variance_test_CILK/variance_test_top10")
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
#filter out genes with all 1s or 2s in the 25th/100th gen samples
colnames(ttg_segmentation_epimutable_genes)[c(25,26,28:31,33,37,38,40:43,45)]
## [1] "A25" "B25" "D25" "F25" "G25" "H25" "J25" "A100" "B100" "D100"
## [11] "F100" "G100" "H100" "J100"
test_epigen_inheritance<-function(row){
num_matches<-11-sum(abs(row[c(25,26,28:31,33)]-row[c(37,38,40:43,45)]))
rand_matches_distribution<-rep(0,100000)
for (i in seq(100000)){
rand_25<-sample(row[c(25,26,28:31,33)]); rand_100<-sample(row[c(37,38,40:43,45)])
rand_matches<-11-sum(abs(rand_25-rand_100))
rand_matches_distribution[i]<-rand_matches
}
return(c(num_matches,length(which(rand_matches_distribution>=num_matches))/length(rand_matches_distribution)))
}
test_df_noCILK<-data.frame(t(apply(ttg_segmentation_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance)))
colnames(test_df_noCILK)<-c("num_matches","pval")
test_df_noCILK$padj<-p.adjust(test_df_noCILK$pval,method="fdr")
ggplot(test_df_noCILK)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withoutCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_df_noCILK)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withoutCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_df_noCILK<-test_df_noCILK[order(test_df_noCILK$pval,decreasing = FALSE),]
ordered_test_df_noCILK[1:10,]
## num_matches pval padj
## Y116F11A.1 11 0.02806 1
## K11D2.2.1 10 0.11629 1
## F58E10.7 11 0.14137 1
## C27D8.2 11 0.14328 1
## K10D2.8 11 0.14386 1
## F52C9.1.2 11 0.14398 1
## F56D6.15 10 0.14488 1
## T23F6.3a 10 0.28199 1
## Y17D7C.3 9 0.28335 1
## Y49F6A.10 9 0.28350 1
for (gene in rownames(ordered_test_df_noCILK[1:10,])){
plot_sRNA_levels(gene,ordered_test_df_noCILK[gene,"pval"],ordered_test_df_noCILK[gene,"padj"],
savestring = "epistates_test_noCILK/epistates_test_noCILK_top10")
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
write.table(ordered_test_df_noCILK,"epistates_test_df_noCILK.txt")
test_epigen_inheritance_variance<-function(row){
num_matches<-sum((row[c(25,26,28:31,33)]-row[c(37,38,40:43,45)])**2)/11
rand_matches_distribution<-rep(0,100000)
for (i in seq(100000)){
rand_25<-sample(row[c(25,26,28:31,33)]); rand_100<-sample(row[c(37,38,40:43,45)])
rand_variance<-sum((rand_25-rand_100)**2)/11
rand_matches_distribution[i]<-rand_variance
}
return(c(num_matches,length(which(rand_matches_distribution<=num_matches))/length(rand_matches_distribution)))
}
test_variance_df_noCILK<-data.frame(t(apply(ttg_DEseqnorm_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance_variance)))
colnames(test_variance_df_noCILK)<-c("num_matches","pval")
test_variance_df_noCILK$padj<-p.adjust(test_variance_df_noCILK$pval,method="fdr")
ggplot(test_variance_df_noCILK)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withoutCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_variance_df_noCILK)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))
ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withoutCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_variance_df_noCILK<-test_variance_df_noCILK[order(test_variance_df_noCILK$pval,decreasing = FALSE),]
ordered_test_variance_df_noCILK[1:10,]
## num_matches pval padj
## C27D8.2 3.965678e+01 0.00260 0.4196544
## R02F2.2 8.524297e+00 0.00292 0.4196544
## F56D6.15 8.313818e+02 0.00392 0.4196544
## K09E3.6 1.957955e+03 0.00601 0.4196544
## Y17D7C.3 4.400638e+06 0.00633 0.4196544
## Y37H2B.1 1.091051e+03 0.00685 0.4196544
## F07B7.12 5.266671e+00 0.00691 0.4196544
## F54F7.7 1.751573e+04 0.00835 0.4196544
## F21D9.7 5.991030e+04 0.01108 0.4196544
## C46G7.3 3.133321e+04 0.01341 0.4196544
write.table(ordered_test_variance_df_noCILK,"variance_test_df_noCILK.txt")
for (gene in rownames(ordered_test_variance_df_noCILK[1:10,])){
plot_sRNA_levels(gene,ordered_test_variance_df_noCILK[gene,"pval"],ordered_test_variance_df_noCILK[gene,"padj"],
savestring = "variance_test_noCILK/variance_test_noCILK_top10")
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
plot_sRNA_levels<-function(gene,pval,padj,N,savestring){
ordered_data_points<-as.numeric(ttg_DEseqnorm_epimutable_genes[gene,c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
data<-data.frame(ttg_counts=ordered_data_points,line=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
data$sample<-factor(data$line,levels=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
data$generation<-c("PeMA",rep(c("25","100"),11)); data$generation<-factor(data$generation,levels=c("PeMA","25","100"))
data$lineage<-c("PeMA","A","A","B","B","C","C","D","D","F","F","G","G","H","H","I","I","J","J","K","K","L","L")
data$lineage<-factor(data$lineage,levels=c("PeMA","A","B","C","D","F","G","H","I","J","K","L"))
print(ggplot(data)+geom_point(aes(y=ttg_counts,x=lineage,color=generation),size=3)+theme_classic()+ylab("normalised 22G counts")+xlab("lineage")+ggtitle(paste(paste(paste(paste(paste(paste(gene,"p variance test",sep=","),pval,sep="="),"p epistates test",sep=","),padj,sep="="),"number of matches",sep=""),N,sep="="))
+ scale_color_manual(values=c("#fc8d62", "#66c2a5", "#8da0cb")))
if (savestring=="no"){
return()}else{
ggsave(filename = paste(paste(paste("../04_Figure2/",savestring,sep=""),gene,sep="_"),"pdf",sep="."),dpi="retina")}
}
plot_sRNA_levels("F52C9.1.2",0.07446,0.08984,11,"plots_for_suppfig2/")
## Saving 7 x 5 in image
plot_sRNA_levels("Y116F11A.1",0.02376,0.02414,10,"plots_for_suppfig2/")
## Saving 7 x 5 in image
plot_sRNA_levels("K10D2.8",0.00589,0.18263,10,"plots_for_suppfig2/")
## Saving 7 x 5 in image
plot_sRNA_levels("C27D8.2",0.00624,0.09163,11,"plots_for_suppfig2/")
## Saving 7 x 5 in image
ttg_MAplot_data_p1Eminus4[which(ttg_MAplot_data_p1Eminus4$line2=="PMA" & ttg_MAplot_data_p1Eminus4$fc<0),]
## ID mean fc padj line1 line2
## 319 T03F6.1 12.825567 -1.659804 1.882552e-08 A25 PMA
## 321 T10D4.5 5.911864 -4.089286 1.050306e-15 A25 PMA
## 323 Y65B4BL.7a 4.866717 -3.072315 7.356761e-07 A25 PMA
## 1202 C07G3.9.2 10.130461 -1.805331 2.619946e-11 B25 PMA
## 1207 C55C3.7 7.043928 -5.346549 4.277422e-77 B25 PMA
## 1212 F23B12.4c.2 7.958701 -1.367157 6.647172e-05 B25 PMA
## 1216 F42G8.5 7.342828 -2.372058 2.932981e-15 B25 PMA
## 1218 K10G9.2 9.279968 -1.425779 4.216102e-06 B25 PMA
## 1220 M01E11.4c 8.602249 -1.683475 2.646414e-08 B25 PMA
## 1229 Y65B4BL.7a 5.205076 -3.446094 2.454591e-16 B25 PMA
## 1232 ZC190.7 4.369778 -2.845912 3.166586e-07 B25 PMA
## 1233 ZC190.8 6.214683 -3.120048 2.422865e-19 B25 PMA
## 1737 C02F5.10 8.041969 -2.554854 1.720883e-07 C25 PMA
## 1742 F40D4.17 8.285054 -2.421074 6.462507e-07 C25 PMA
## 1744 T03F6.1 12.929700 -1.795386 1.576476e-14 C25 PMA
## 1747 T10D4.5 5.228074 -3.353165 8.100023e-07 C25 PMA
## 2562 B0261.8 11.014256 -1.298769 2.999386e-07 D25 PMA
## 2565 C26C6.3 9.860089 -1.473888 3.266113e-08 D25 PMA
## 2567 C46A5.8 7.306049 -1.458529 2.571334e-07 D25 PMA
## 2576 F52C9.1.2 8.203823 -1.711948 9.849541e-11 D25 PMA
## 2577 F55G1.10 7.965168 -3.169797 3.174355e-40 D25 PMA
## 2579 H14A12.5 8.684057 -3.963749 2.709580e-62 D25 PMA
## 2583 T23D8.6 6.904268 -1.937391 1.658645e-13 D25 PMA
## 2586 W02B8.3 6.935998 -2.319795 7.087033e-20 D25 PMA
## 2590 Y65B4BL.7a 5.096796 -3.327460 4.787028e-18 D25 PMA
## 3107 B0511.3 7.308011 -3.491121 2.154514e-20 F25 PMA
## 3108 B0511.4 5.608469 -5.190702 4.650496e-31 F25 PMA
## 3110 C07G1.7 3.649766 -4.591119 1.593172e-09 F25 PMA
## 3116 T03F6.1 12.975479 -1.854088 1.275609e-11 F25 PMA
## 3121 T20F5.5 7.501455 -2.814376 3.027300e-13 F25 PMA
## 3816 C27D8.1 5.343421 -6.325543 1.389595e-26 G25 PMA
## 3818 C36B7.6 7.079757 -2.675156 3.177847e-06 G25 PMA
## 3823 F46F5.3 5.712026 -2.926872 4.701529e-05 G25 PMA
## 3827 F55G1.10 7.445012 -2.578312 3.177847e-06 G25 PMA
## 3829 H14A12.5 7.224404 -2.332426 6.941691e-05 G25 PMA
## 3831 M01E11.4c 9.009509 -2.197179 5.880084e-05 G25 PMA
## 3836 T28F2.1 4.427532 -4.490946 8.460361e-09 G25 PMA
## 3843 Y60A3A.24 5.660550 -2.905159 5.880084e-05 G25 PMA
## 4229 C07G1.7 3.268587 -4.191724 2.045709e-05 H25 PMA
## 4232 C54E10.1 4.764621 -4.296434 2.059473e-10 H25 PMA
## 4236 T03F6.1 12.736277 -1.541063 1.468405e-06 H25 PMA
## 4238 T10D4.5 5.333945 -3.468990 4.531941e-08 H25 PMA
## 4904 C07G1.7 2.990848 -3.897126 1.077536e-05 I25 PMA
## 4905 C16C8.20 7.036072 -1.857768 6.197790e-06 I25 PMA
## 4909 C54E10.1 3.703600 -3.153358 4.882505e-05 I25 PMA
## 4912 F23B12.4c.2 8.490022 -2.061240 4.818831e-09 I25 PMA
## 4914 T03F6.1 12.829266 -1.664671 4.818831e-09 I25 PMA
## 4922 Y37E3.5c 7.177292 -2.135552 2.195466e-08 I25 PMA
## 5376 C28A5.6 7.665463 -1.922619 1.049815e-08 J25 PMA
## 5379 C56G7.3c 8.620526 -2.120803 1.416419e-11 J25 PMA
## 5382 F22F1.2 6.934125 -1.888810 2.004784e-07 J25 PMA
## 5383 F23B12.4c.2 8.701787 -2.319442 5.416374e-14 J25 PMA
## 5384 F31C3.11 5.944217 -1.914252 3.902400e-05 J25 PMA
## 5386 F39D8.7 10.655019 -1.330376 2.163909e-05 J25 PMA
## 5387 F49E11.2 7.689390 -1.598116 9.696719e-06 J25 PMA
## 5388 T03F6.1 12.668111 -1.448709 1.049815e-08 J25 PMA
## 5399 Y65B4BL.7a 4.900947 -3.110561 2.940171e-09 J25 PMA
## 5798 C07G1.7 3.829594 -4.777943 4.546856e-11 K25 PMA
## 5800 C16C8.20 7.194230 -2.056674 6.488058e-06 K25 PMA
## 5805 C48B6.9 6.023986 -3.986632 6.933766e-20 K25 PMA
## 5806 C54E10.1 3.900281 -3.370558 3.036123e-05 K25 PMA
## 5807 C56G7.3c 8.741887 -2.268681 8.147561e-08 K25 PMA
## 5810 F45H10.1b 6.974302 -1.956925 2.954829e-05 K25 PMA
## 5812 F55G1.10 8.254398 -3.487860 1.871137e-19 K25 PMA
## 5814 K07H8.6c 4.948040 -2.756077 1.122571e-05 K25 PMA
## 5817 M01E11.4c 8.992328 -2.176224 2.932948e-07 K25 PMA
## 5820 R186.1 7.590265 -2.031059 6.488058e-06 K25 PMA
## 5821 T05D4.1.2 5.947429 -2.915467 7.181512e-10 K25 PMA
## 5825 W04B5.3c 8.420392 -1.818887 9.585423e-05 K25 PMA
## 5835 ZK1098.7 8.047849 -2.034949 5.377481e-06 K25 PMA
## 6199 B0511.5 6.771525 -2.240504 9.208740e-06 L25 PMA
## 6200 C07G1.7 3.289531 -4.213805 2.515365e-06 L25 PMA
## 6202 C36B7.6 7.023999 -2.610469 5.983600e-08 L25 PMA
## 6204 C49G7.11 5.227873 -3.095880 9.254738e-07 L25 PMA
## 6209 F47A4.5 7.106980 -2.222723 3.406153e-06 L25 PMA
## 6210 F48E3.9 6.576009 -2.127470 8.215926e-05 L25 PMA
## 6214 F55G1.10 7.345425 -2.461358 9.449843e-08 L25 PMA
## 6216 F58E10.7 7.358035 -2.026011 2.271277e-05 L25 PMA
## 6222 R10F2.1 11.391570 -1.771338 2.515365e-06 L25 PMA
## 6223 T03F6.1 12.915116 -1.776573 1.028825e-06 L25 PMA
## 6231 Y60A3A.24 5.643993 -2.886377 9.254738e-07 L25 PMA